This code is for the results to the question: Is there stability across caregivers, regardless of activity?

Load libraries

library(tidyverse)
library(GGally)
library(ppcor)
library(psych)
library(Hmisc)
library(sjPlot)

# https://github.com/ggobi/ggally/issues/139
my_custom_smooth <- function(data, mapping, ...) {
  ggplot(data = data, mapping = mapping) +
    geom_point(alpha = .4, color = I("black")) + 
    geom_smooth(method = "lm", color = I("blue"), ...)
}

Read in data and demographic information

# demographics
demo_english <- read_csv("./data_demo_lena_transcripts/demo_english_ms.csv") %>% 
  rename(id = ID, hi = HI24Final, momed = Momed) %>% 
  dplyr::select(id, hi, momed) %>% 
  mutate(id = as.character(id), 
         language = "english")

demo_spanish <- read_csv("./data_demo_lena_transcripts/demo_spanish_ms.csv") %>% 
  rename(id = ID, hi = HI_18, momed = MomEd_25m) %>% 
  dplyr::select(id, hi, momed) %>% 
  mutate(id = as.character(id), 
         language = "spanish")

demo <- rbind(demo_english, demo_spanish)


# NOTE about periods of non-tCDCS
# gemods refers to when there are designated start/end periods of other-directed speech (ODS); this was captured using gems (@G) using CHAT conventions
# kwalods refers to when ODS was transcribed at an utterance-level within a tCDS activity period between caregiver and child (e.g., other-directed speech in the background); this was captured per utterances using CHAT postcodes
## for tokens/min and types/min, we do not include ODS that occurred within a period of tCDS, because durations were captured by activity and not by utterance
## for mlu, we include all ODS across gemods and kwalods


# NOTE about speech == "all"
# "speech" includes two levels: all, spont
# all = refers to all speech by caregivers
# spont = refers to only speech by caregivers that was considered spontaneous rather than recited (e.g., reading book text, singing memorized common songs like itsy bitsy spider); therefore, 'spont' is a subset of 'all'


# freq
freq <- read_csv("./data_demo_lena_transcripts/freq.csv") %>% 
  filter(activity != "kwalods", 
         speech == "all") %>% 
  mutate(activity = recode(activity, "play" = "cc", "conv" = "cc", 
                           "food" = "cc", "routines" = "cc", "gemods" = "non_tcds")) %>% # we collapsed across play, routines, feeding, and unstructured conversation for the final paper [see supplemental for all categories]
  group_by(id, activity, segment_num, speaker) %>% 
  mutate(dur_min2 = sum(dur_min),
         tokens2 = sum(tokens), 
         types2 = mean(types)) %>% 
  distinct(id, activity, language, speaker, segment_num, tokens2, types2, dur_min2) %>% 
  mutate(tokens_permin2 = tokens2/dur_min2, 
         types_permin2 = types2/dur_min2) %>% 
  distinct(id, activity, language, speaker, segment_num, tokens_permin2, types_permin2) %>% 
  ungroup() %>% 
  mutate(activity = factor(activity, levels = c("books", "cc", "ac", "non_tcds")), 
         id = factor(id), 
         language = factor(language)) 




# mlu
mlu <- read_csv("./data_demo_lena_transcripts/mlu.csv") %>% 
  filter(speech == "all") %>% 
  mutate(activity = recode(activity, "play" = "cc", "conv" = "cc", 
                           "food" = "cc", "routines" = "cc", "ods" = "non_tcds")) %>% 
  group_by(id, activity, segment_num, speaker) %>% 
  mutate(words_sum2 = sum(words_sum),
         num_utt_sum2 = sum(num_utt_sum)) %>% 
  distinct(id, activity, language, speaker, segment_num, words_sum2, num_utt_sum2) %>% 
  group_by(id, activity, segment_num, speaker) %>% 
  mutate(mlu_w2 = words_sum2/num_utt_sum2) %>% 
  distinct(id, activity, language, speaker, segment_num, mlu_w2) %>% 
  ungroup() %>% 
  mutate(activity = factor(activity, levels = c("books", "cc", "ac", "non_tcds")), 
         id = factor(id), 
         language = factor(language))


# chip
# this includes only caregivers, therefore there is no speaker column
# we exclude periods of ODS because this is about responsiveness to the child during periods of tCDS
chip <- read_csv("./data_demo_lena_transcripts/chip.csv") %>% 
  filter(activity != "ods") %>% 
  mutate(activity = recode(activity, "play" = "cc", "conv" = "cc", 
                           "food" = "cc", "routines" = "cc")) %>% 
  group_by(id, activity, segment_num) %>% 
  mutate(total_adult_resp2 = sum(total_adult_resp),
         total_adult_imitexp2 = sum(total_adult_imitexp), 
         total_child_utt2 = sum(total_child_utt)) %>% 
  distinct(id, activity, language, segment_num, total_adult_resp2, total_adult_imitexp2, total_child_utt2) %>%
  mutate(prop_adultresp2 = total_adult_resp2/total_child_utt2, 
         prop_adult_imitexp2 = total_adult_imitexp2/total_child_utt2) %>% 
  distinct(id, activity, segment_num, language, prop_adultresp2, prop_adult_imitexp2) %>% 
  ungroup() %>% 
  mutate(activity = factor(activity, levels = c("books", "cc", "ac")), 
         id = factor(id), 
         language = factor(language))
  

str(freq)
## tibble[,7] [2,870 × 7] (S3: tbl_df/tbl/data.frame)
##  $ id            : Factor w/ 90 levels "7292","7352",..: 47 47 47 47 50 50 52 52 52 52 ...
##  $ activity      : Factor w/ 4 levels "books","cc","ac",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ speaker       : chr [1:2870] "CHI" "ADULTS" "CHI" "ADULTS" ...
##  $ segment_num   : num [1:2870] 12 12 15 15 2 2 11 11 5 5 ...
##  $ language      : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ tokens_permin2: num [1:2870] 8.46 42.57 5.32 21.75 12.31 ...
##  $ types_permin2 : num [1:2870] 4.79 19.73 2.59 9.89 3.61 ...
str(mlu)
## tibble[,6] [2,594 × 6] (S3: tbl_df/tbl/data.frame)
##  $ id         : Factor w/ 90 levels "7292","7352",..: 46 46 46 46 46 46 46 46 46 46 ...
##  $ activity   : Factor w/ 4 levels "books","cc","ac",..: 3 3 2 2 4 4 3 3 2 2 ...
##  $ speaker    : chr [1:2594] "ADULTS" "CHI" "ADULTS" "CHI" ...
##  $ segment_num: num [1:2594] 2 2 2 2 2 2 3 3 3 3 ...
##  $ language   : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ mlu_w2     : num [1:2594] 3.18 1.89 2.84 1.73 5.5 ...
str(chip)
## tibble[,6] [899 × 6] (S3: tbl_df/tbl/data.frame)
##  $ activity           : Factor w/ 3 levels "books","cc","ac": 3 2 3 2 3 2 2 3 2 3 ...
##  $ id                 : Factor w/ 90 levels "7292","7352",..: 46 46 46 46 46 46 46 46 46 46 ...
##  $ language           : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ segment_num        : num [1:899] 2 2 3 3 4 4 5 7 7 8 ...
##  $ prop_adultresp2    : num [1:899] 1.35 1.57 1.43 1.65 2.14 ...
##  $ prop_adult_imitexp2: num [1:899] 0.391 0.418 0.463 0.35 0.643 ...

Create dfs for ADULTS

# FREQ
freq_adult <- freq %>% 
  filter(speaker == "ADULTS")


# MLU
mlu_adult <- mlu %>% 
  filter(speaker == "ADULTS")

Create dfs averaging across segments for one obs per activity

Tokens and Types (rate per min)

# tokens and types: average per activity
freq_adult_per_activity_id <- freq_adult %>% 
  group_by(id, activity) %>% 
  mutate(tokens_permin_avg_act = mean(tokens_permin2), 
         types_permin_avg_act = mean(types_permin2)) %>% 
  distinct(id, activity, language, tokens_permin_avg_act, types_permin_avg_act) %>% 
  ungroup() %>% 
  mutate(activity = factor(activity, levels = c("books", "cc", "ac", "non_tcds")))

MLUw

# mlu: average per activity
mlu_adult_per_activity_id <- mlu_adult %>% 
  group_by(id, activity) %>% 
  mutate(mluw_avg_act = mean(mlu_w2)) %>% 
  distinct(id, activity, language, mluw_avg_act) %>% 
  ungroup() %>% 
  mutate(activity = factor(activity, levels = c("books", "cc", "ac", "non_tcds")))

Responses and Imitations/Expansions

# chip: average per activity
chip_per_activity_id <- chip %>% 
  group_by(id, activity) %>% 
  mutate(prop_adultresp_avg_act = mean(prop_adultresp2, na.rm = T), 
         prop_adult_imitexp_avg_act = mean(prop_adult_imitexp2, na.rm = T)) %>% 
  distinct(id, activity, language, prop_adultresp_avg_act, prop_adult_imitexp_avg_act)

Create wide dfs for matrices - tokens and types (rate)

# tokens
# all
tokens_mtx_rate <- freq_adult_per_activity_id %>% 
  dplyr::select(id, language, activity, tokens_permin_avg_act) %>% 
  pivot_wider(names_from = activity, values_from = tokens_permin_avg_act) %>% 
  ungroup() %>% 
  dplyr::select(c("language", "books", "cc", "ac", "non_tcds"))



# types
# all
types_mtx_rate <- freq_adult_per_activity_id %>% 
  dplyr::select(id, language, activity, types_permin_avg_act) %>% 
  pivot_wider(names_from = activity, values_from = types_permin_avg_act) %>% 
  ungroup() %>% 
  dplyr::select(c("language", "books", "cc", "ac", "non_tcds"))

Create wide dfs for matrices - mluw

# all
mlu_mtx <- mlu_adult_per_activity_id %>% 
  dplyr::select(id, language, activity, mluw_avg_act) %>% 
  pivot_wider(names_from = activity, values_from = mluw_avg_act) %>% 
  ungroup() %>% 
  dplyr::select(c("language", "books", "cc", "ac", "non_tcds"))

Create wide dfs for matrix - responses and imit/exp

# prop responses
# all
propresp_mtx <- chip_per_activity_id %>% 
  dplyr::select(id, language, activity, prop_adultresp_avg_act) %>% 
  pivot_wider(names_from = activity, values_from = prop_adultresp_avg_act) %>% 
  ungroup() %>% 
  dplyr::select(c("language", "books", "cc", "ac"))


# prop imitations and expansions
# all
propimitexp_mtx <- chip_per_activity_id %>% 
  dplyr::select(id, language, activity, prop_adult_imitexp_avg_act) %>% 
  pivot_wider(names_from = activity, values_from = prop_adult_imitexp_avg_act) %>% 
  ungroup() %>% 
  dplyr::select(c("language", "books", "cc", "ac"))

Zero-order correlations, one obs per ID (averaged across all activities)

# tokens and types rate
zero_corr_tokens_types <- freq_adult_per_activity_id %>% 
  ungroup() %>% 
  group_by(id) %>% 
  mutate(tokens_permin_avg_id = mean(tokens_permin_avg_act), 
         types_permin_avg_id = mean(types_permin_avg_act)) %>% 
  distinct(id, language, tokens_permin_avg_id, types_permin_avg_id)


# mlu
zero_corr_mlu <- mlu_adult_per_activity_id %>% 
  ungroup() %>% 
  group_by(id) %>% 
  mutate(mluw_mean_avg_id = mean(mluw_avg_act)) %>% 
  distinct(id, language, mluw_mean_avg_id)  


# chip
zero_corr_chip <- chip_per_activity_id %>% 
  ungroup() %>% 
  group_by(id) %>% 
  mutate(propresp_avg_id = mean(prop_adultresp_avg_act, na.rm = T), 
         propimitexp_avg_id = mean(prop_adult_imitexp_avg_act, na.rm = T)) %>% 
  distinct(id, language, propresp_avg_id, propimitexp_avg_id) 



# combining tokens, types, mlu
zero_corr_all_measures <- zero_corr_tokens_types %>% 
  full_join(zero_corr_mlu, by = c("id", "language")) %>% 
  full_join(zero_corr_chip, by = c("id", "language"))


# matrix
ggpairs(data = zero_corr_all_measures, 
        columns = 3:7, 
        lower = list(continuous = my_custom_smooth), 
        upper = list(continuous = wrap("cor", size = 9)),
        title = "zero order corr (avg'd across all activities) - tokens, types, mlu") + 
  theme(text= element_text(size = 18))

# correlation matrices between verbal engagement measures
# all
zero_corr_all_measures2 <- zero_corr_all_measures %>% 
  ungroup %>% 
  dplyr::select(-c(id, language))

rcorr(as.matrix(zero_corr_all_measures2), type = c("pearson"))
##                      tokens_permin_avg_id types_permin_avg_id mluw_mean_avg_id
## tokens_permin_avg_id                 1.00                0.89             0.62
## types_permin_avg_id                  0.89                1.00             0.42
## mluw_mean_avg_id                     0.62                0.42             1.00
## propresp_avg_id                      0.42                0.30             0.19
## propimitexp_avg_id                   0.24                0.16             0.20
##                      propresp_avg_id propimitexp_avg_id
## tokens_permin_avg_id            0.42               0.24
## types_permin_avg_id             0.30               0.16
## mluw_mean_avg_id                0.19               0.20
## propresp_avg_id                 1.00               0.42
## propimitexp_avg_id              0.42               1.00
## 
## n= 90 
## 
## 
## P
##                      tokens_permin_avg_id types_permin_avg_id mluw_mean_avg_id
## tokens_permin_avg_id                      0.0000              0.0000          
## types_permin_avg_id  0.0000                                   0.0000          
## mluw_mean_avg_id     0.0000               0.0000                              
## propresp_avg_id      0.0000               0.0036              0.0769          
## propimitexp_avg_id   0.0249               0.1218              0.0613          
##                      propresp_avg_id propimitexp_avg_id
## tokens_permin_avg_id 0.0000          0.0249            
## types_permin_avg_id  0.0036          0.1218            
## mluw_mean_avg_id     0.0769          0.0613            
## propresp_avg_id                      0.0000            
## propimitexp_avg_id   0.0000

Correlation Matrices - within each language

Tokens - Rate

ggpairs(data = tokens_mtx_rate, 
        columns = 2:5, 
        switch = 'y', 
        lower = list(continuous = my_custom_smooth),
        upper = list(continuous = wrap("cor", size = 7)),
        title = "Tokens rate (English and Spanish)") + 
  theme_classic() + 
  theme(text= element_text(size = 18),
        strip.placement = "outside",
        strip.text.y = element_text(face = "bold", size = 15), 
        strip.text.x = element_text(face = "bold", size = 15)) 

tokens_mtx_rate2 <- tokens_mtx_rate %>% filter(ac < 530)
ggpairs(data = tokens_mtx_rate2, 
        columns = 2:5, 
        switch = 'y', 
        lower = list(continuous = my_custom_smooth),
        upper = list(continuous = wrap("cor", size = 7)),
        title = "Rm 1 outlier - Tokens rate (English and Spanish)") + 
  theme_classic() + 
  theme(text= element_text(size = 18),
        strip.placement = "outside",
        strip.text.y = element_text(face = "bold", size = 15), 
        strip.text.x = element_text(face = "bold", size = 15)) 

# correlation matrices
tokens_mtx_rate_clean <- tokens_mtx_rate %>% 
  dplyr::select(-language)
rcorr(as.matrix(tokens_mtx_rate_clean), type = c("pearson"))
##          books   cc   ac non_tcds
## books     1.00 0.65 0.29     0.34
## cc        0.65 1.00 0.26     0.51
## ac        0.29 0.26 1.00     0.24
## non_tcds  0.34 0.51 0.24     1.00
## 
## n
##          books cc ac non_tcds
## books       42 42 42       42
## cc          42 90 90       90
## ac          42 90 90       90
## non_tcds    42 90 90       90
## 
## P
##          books  cc     ac     non_tcds
## books           0.0000 0.0592 0.0276  
## cc       0.0000        0.0121 0.0000  
## ac       0.0592 0.0121        0.0238  
## non_tcds 0.0276 0.0000 0.0238

Types - Rate

ggpairs(data = types_mtx_rate, 
        columns = 2:5, 
        switch = 'y', 
        lower = list(continuous = my_custom_smooth),
        upper = list(continuous = wrap("cor", size = 7)),
        title = "Types rate (English and Spanish)") + 
  theme_classic() + 
  theme(text= element_text(size = 18),
        strip.placement = "outside",
        strip.text.y = element_text(face = "bold", size = 15), 
        strip.text.x = element_text(face = "bold", size = 15)) 

# ggsave("./figures/corr_mtx_types_n90_paper.pdf", width = 11, height = 8, dpi = 300)


types_mtx_rate2 <- types_mtx_rate %>% filter(ac < 530)
ggpairs(data = types_mtx_rate2, 
        columns = 2:5, 
        switch = 'y', 
        lower = list(continuous = my_custom_smooth),
        upper = list(continuous = wrap("cor", size = 7)),
        title = "Rm 1 outlier - Types rate (English and Spanish)") + 
  theme_classic() + 
  theme(text= element_text(size = 18),
        strip.placement = "outside",
        strip.text.y = element_text(face = "bold", size = 15), 
        strip.text.x = element_text(face = "bold", size = 15)) 

# correlation matrices
types_mtx_rate_clean <- types_mtx_rate %>% 
  dplyr::select(-language)
rcorr(as.matrix(types_mtx_rate_clean), type = c("pearson"))
##          books   cc   ac non_tcds
## books     1.00 0.23 0.13     0.00
## cc        0.23 1.00 0.08     0.22
## ac        0.13 0.08 1.00     0.19
## non_tcds  0.00 0.22 0.19     1.00
## 
## n
##          books cc ac non_tcds
## books       42 42 42       42
## cc          42 90 90       90
## ac          42 90 90       90
## non_tcds    42 90 90       90
## 
## P
##          books  cc     ac     non_tcds
## books           0.1377 0.3941 0.9904  
## cc       0.1377        0.4523 0.0354  
## ac       0.3941 0.4523        0.0738  
## non_tcds 0.9904 0.0354 0.0738

MLUw

mlu_mtx2 <- mlu_mtx %>% 
  rename("Books" = "books", "Other Child-cent" = "cc", "Adult-cent" = "ac", "non-tCDS" = "non_tcds")

ggpairs(data = mlu_mtx2, 
        columns = 2:5, 
        switch = 'y', 
        lower = list(continuous = my_custom_smooth),
        upper = list(continuous = wrap("cor", size = 11)),
        title = "MLUw (English and Spanish)") + 
  theme_classic() + 
  theme(text= element_text(size = 26),
        strip.placement = "outside",
        strip.text.y = element_text(face = "bold", size = 20), 
        strip.text.x = element_text(face = "bold", size = 20)) 

ggsave("./figures/corr_mtx_mlu_n90_paper2.pdf", width = 15, height = 12, dpi = 300)

# correlation matrices
mlu_mtx_clean <- mlu_mtx %>% 
  dplyr::select(-language)
rcorr(as.matrix(mlu_mtx_clean), type = c("pearson"))
##          books   cc   ac non_tcds
## books     1.00 0.67 0.41     0.26
## cc        0.67 1.00 0.69     0.54
## ac        0.41 0.69 1.00     0.44
## non_tcds  0.26 0.54 0.44     1.00
## 
## n
##          books cc ac non_tcds
## books       42 42 42       42
## cc          42 90 90       90
## ac          42 90 90       90
## non_tcds    42 90 90       90
## 
## P
##          books  cc     ac     non_tcds
## books           0.0000 0.0066 0.0918  
## cc       0.0000        0.0000 0.0000  
## ac       0.0066 0.0000        0.0000  
## non_tcds 0.0918 0.0000 0.0000

PROP RESPONSES

ggpairs(data = propresp_mtx, 
        columns = 2:4, 
        switch = 'y', 
        lower = list(continuous = my_custom_smooth),
        upper = list(continuous = wrap("cor", size = 7)),
        title = "Prop Resp (English and Spanish)") + 
  theme_classic() + 
  theme(text= element_text(size = 18),
        strip.placement = "outside",
        strip.text.y = element_text(face = "bold", size = 15), 
        strip.text.x = element_text(face = "bold", size = 15)) 

# ggsave("./figures/corr_mtx_propresp_n90_paper.pdf", width = 11, height = 8, dpi = 300)


# correlation matrices
propresp_mtx_clean <- propresp_mtx %>% 
  dplyr::select(-language)
rcorr(as.matrix(propresp_mtx_clean), type = c("pearson"))
##       books   cc   ac
## books  1.00 0.36 0.21
## cc     0.36 1.00 0.27
## ac     0.21 0.27 1.00
## 
## n
##       books cc ac
## books    42 42 39
## cc       42 89 86
## ac       39 86 87
## 
## P
##       books  cc     ac    
## books        0.0202 0.1959
## cc    0.0202        0.0114
## ac    0.1959 0.0114

PROP IMIT and EXP

ggpairs(data = propimitexp_mtx, 
        columns = 2:4, 
        switch = 'y', 
        lower = list(continuous = my_custom_smooth),
        upper = list(continuous = wrap("cor", size = 7)),
        title = "Prop Imit/Exp (English and Spanish)") + 
  theme_classic() + 
  theme(text= element_text(size = 18),
        strip.placement = "outside",
        strip.text.y = element_text(face = "bold", size = 15), 
        strip.text.x = element_text(face = "bold", size = 15)) 

# ggsave("./figures/corr_mtx_propimitexp_n90_paper.pdf", width = 11, height = 8, dpi = 300)


# correlation matrices
propimitexp_mtx_clean <- propimitexp_mtx %>% 
  dplyr::select(-language)
rcorr(as.matrix(propimitexp_mtx_clean), type = c("pearson"))
##       books   cc   ac
## books  1.00 0.32 0.02
## cc     0.32 1.00 0.31
## ac     0.02 0.31 1.00
## 
## n
##       books cc ac
## books    42 42 39
## cc       42 89 86
## ac       39 86 87
## 
## P
##       books  cc     ac    
## books        0.0400 0.9082
## cc    0.0400        0.0043
## ac    0.9082 0.0043

Forest Plot

# create dfs with correlations
tokens_corr <- psych::corr.test(tokens_mtx_rate_clean)
types_corr <- psych::corr.test(types_mtx_rate_clean)
mlu_corr <- psych::corr.test(mlu_mtx_clean)
resp_corr <- psych::corr.test(propresp_mtx_clean)
imitexp_corr <- psych::corr.test(propimitexp_mtx_clean)



# get lower, r, upper, and p; lower and upper
tokens_rcip <- tibble::rownames_to_column(round(tokens_corr$ci, 3), "activity") %>%
  mutate(x = word(activity, 1, sep = "-"),
         y = word(activity, 2, sep = "-")) %>%
  mutate(measure = "tokens_rate",
         y = recode(y, "nn_tc" = "non_tcds")) %>%
  dplyr::select(-activity)

types_rcip <- tibble::rownames_to_column(round(types_corr$ci, 3), "activity") %>%
  mutate(x = word(activity, 1, sep = "-"),
         y = word(activity, 2, sep = "-")) %>%
  mutate(measure = "types_rate",
         y = recode(y, "nn_tc" = "non_tcds")) %>%
  dplyr::select(-activity)

mluw_rcip <- tibble::rownames_to_column(round(mlu_corr$ci, 3), "activity") %>%
  mutate(x = word(activity, 1, sep = "-"),
         y = word(activity, 2, sep = "-")) %>%
  mutate(measure = "mluw",
         y = recode(y, "nn_tc" = "non_tcds")) %>%
  dplyr::select(-activity)

resp_rcip <- tibble::rownames_to_column(round(resp_corr$ci, 3), "activity") %>%
  mutate(x = word(activity, 1, sep = "-"),
         y = word(activity, 2, sep = "-")) %>%
  mutate(measure = "prop_resp") %>%
  dplyr::select(-activity)

imitexp_rcip <- tibble::rownames_to_column(round(imitexp_corr$ci, 3), "activity") %>%
  mutate(x = word(activity, 1, sep = "-"),
         y = word(activity, 2, sep = "-")) %>%
  mutate(measure = "prop_imit_exp") %>%
  dplyr::select(-activity)


df_forest <- rbind(tokens_rcip, types_rcip, mluw_rcip, resp_rcip, imitexp_rcip) %>%
  mutate(x = recode(x, "books" = "Books", "cc" = "Other Child-cent", 
                       "ac" = "Adult-cent", "non_tcds" = "non-tCDS"),
         y = recode(y, "books" = "Books", "cc" = "Other Child-cent", 
                       "ac" = "Adult-cent", "non_tcds" = "non-tCDS")) %>% 
  mutate(x = factor(x, levels = c("Books", "Other Child-cent", "Adult-cent", "non-tCDS")), 
         y = factor(y, levels = c("Books", "Other Child-cent", "Adult-cent", "non-tCDS")),
         measure = factor(measure, levels = c("tokens_rate", "types_rate",  "mluw",
                                              "prop_resp", "prop_imit_exp"))) %>%
  as_tibble()



# forest plot
# https://datascienceplus.com/lattice-like-forest-plot-using-ggplot2-in-r/
ggplot(df_forest,
    aes(x = measure,y = r, ymin = lower, ymax = upper))+
    scale_colour_gradient(low = "green", high = "red") +
    geom_pointrange(aes(shape = measure, color = r), size = 1) +
    geom_rect(aes(xmin = 0, xmax = Inf, ymin = Inf, ymax = .3),
                   fill = "grey", alpha = 0.08) +
    # geom_hline(yintercept = 0, linetype = 1) +
    geom_hline(yintercept = 0, linetype = 2) +
    xlab('Activity')+ ylab("\nPearson's r and Confidence Interval") +
    facet_grid(x ~ y, switch = "y") +
    theme_classic() +
    theme(plot.title = element_text(size = 16,face = "bold"),
          strip.placement = "outside",
          axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title = element_text(size = 26),
        axis.title.y = element_text(vjust = 3),
        axis.text.y = element_text(size = 20),
        strip.text.y = element_text(face = "bold", size = 25),
        strip.text.x = element_text(face = "bold", size = 25)) +
  theme(legend.position = c(0.1, 0.35),
        legend.key.size = unit(1.5, 'cm'), 
        legend.text=element_text(size = 16), 
        legend.title = element_text(size = 16))

ggsave("./figures/eff_sizes_4act2.pdf", width = 15, height = 12, dpi = 300)

Regressions to check lang intx - NS for books ~ cc

Choosing books and play since this has the most consistent positive and moderate - strong correlations across all input measures

Tokens - rate - books x cc

# books ~ cc
lm_tokens_rate1 <- lm(books ~ cc, data = tokens_mtx_rate)

lm_tokens_rate2 <- lm(books ~ cc + language, data = tokens_mtx_rate)

lm_tokens_rate3 <- lm(books ~ cc * language, data = tokens_mtx_rate)

anova(lm_tokens_rate1, lm_tokens_rate2, lm_tokens_rate3)
## Analysis of Variance Table
## 
## Model 1: books ~ cc
## Model 2: books ~ cc + language
## Model 3: books ~ cc * language
##   Res.Df   RSS Df Sum of Sq      F   Pr(>F)   
## 1     40 18310                                
## 2     39 15190  1   3119.40 7.8119 0.008092 **
## 3     38 15174  1     16.57 0.0415 0.839669   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# summary
summary(lm_tokens_rate2)
## 
## Call:
## lm(formula = books ~ cc + language, data = tokens_mtx_rate)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -47.107 -11.667  -1.544  12.003  52.206 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      49.9838    10.9859   4.550 5.12e-05 ***
## cc                0.6769     0.1459   4.639 3.89e-05 ***
## languagespanish -18.1471     6.4125  -2.830  0.00732 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.74 on 39 degrees of freedom
##   (48 observations deleted due to missingness)
## Multiple R-squared:  0.5164, Adjusted R-squared:  0.4916 
## F-statistic: 20.82 on 2 and 39 DF,  p-value: 7.044e-07
# diag
plot_model(lm_tokens_rate1, type = "pred", terms = c("cc"),
           show.data = T,
           dot.size = 4)

Types - rate - books x cc

# books ~ cc
lm_types_rate1 <- lm(books ~ cc, data = types_mtx_rate)

lm_types_rate2 <- lm(books ~ cc + language, data = types_mtx_rate)

lm_types_rate3 <- lm(books ~ cc * language, data = types_mtx_rate)

anova(lm_types_rate1, lm_types_rate2, lm_types_rate3)
## Analysis of Variance Table
## 
## Model 1: books ~ cc
## Model 2: books ~ cc + language
## Model 3: books ~ cc * language
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     40 3937.8                              
## 2     39 3556.1  1    381.67 4.0798 0.05049 .
## 3     38 3554.9  1      1.19 0.0127 0.91082  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# summary
summary(lm_types_rate1)
## 
## Call:
## lm(formula = books ~ cc, data = types_mtx_rate)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.170  -7.572  -1.044   4.320  21.398 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  24.0053     3.5119   6.836 3.18e-08 ***
## cc            0.2017     0.1332   1.515    0.138    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.922 on 40 degrees of freedom
##   (48 observations deleted due to missingness)
## Multiple R-squared:  0.05425,    Adjusted R-squared:  0.0306 
## F-statistic: 2.294 on 1 and 40 DF,  p-value: 0.1377
# diag
plot_model(lm_types_rate1, type = "pred", terms = c("cc"),
           show.data = T,
           dot.size = 4)

MLUw - books x cc

# books ~ cc
lm_mluw1 <- lm(books ~ cc, data = mlu_mtx)

lm_mluw2 <- lm(books ~ cc + language, data = mlu_mtx)

lm_mluw3 <- lm(books ~ cc * language, data = mlu_mtx)

anova(lm_mluw1, lm_mluw2, lm_mluw3)
## Analysis of Variance Table
## 
## Model 1: books ~ cc
## Model 2: books ~ cc + language
## Model 3: books ~ cc * language
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     40 41.749                           
## 2     39 41.542  1   0.20726 0.1965 0.6600
## 3     38 40.071  1   1.47103 1.3950 0.2449
# summary
summary(lm_mluw1)
## 
## Call:
## lm(formula = books ~ cc, data = mlu_mtx)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.89878 -0.81209 -0.00908  0.55754  2.81095 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.5193     0.8698  -0.597    0.554    
## cc            1.3967     0.2445   5.711  1.2e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.022 on 40 degrees of freedom
##   (48 observations deleted due to missingness)
## Multiple R-squared:  0.4492, Adjusted R-squared:  0.4354 
## F-statistic: 32.62 on 1 and 40 DF,  p-value: 1.202e-06
# diag
plot_model(lm_mluw1, type = "pred", terms = c("cc"),
           show.data = T,
           dot.size = 4)

Prop Resp - books x cc

# books ~ cc
lm_propresp1 <- lm(books ~ cc, data = propresp_mtx)

lm_propresp2 <- lm(books ~ cc + language, data = propresp_mtx)

lm_propresp3 <- lm(books ~ cc * language, data = propresp_mtx)

anova(lm_propresp1, lm_propresp2, lm_propresp3)
## Analysis of Variance Table
## 
## Model 1: books ~ cc
## Model 2: books ~ cc + language
## Model 3: books ~ cc * language
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     40 15.508                           
## 2     39 14.947  1   0.56180 1.4527 0.2355
## 3     38 14.695  1   0.25115 0.6494 0.4253
# summary
summary(lm_propresp1)
## 
## Call:
## lm(formula = books ~ cc, data = propresp_mtx)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.15412 -0.33585 -0.00945  0.32105  1.89345 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   1.1820     0.4551   2.598   0.0131 *
## cc            0.5838     0.2414   2.418   0.0202 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6227 on 40 degrees of freedom
##   (48 observations deleted due to missingness)
## Multiple R-squared:  0.1275, Adjusted R-squared:  0.1057 
## F-statistic: 5.847 on 1 and 40 DF,  p-value: 0.02025
# diag
plot_model(lm_propresp2, type = "pred", terms = c("cc"),
           show.data = T,
           dot.size = 4)

Prop Imit/Exp - books x cc

# books ~ cc
lm_propimitexp1 <- lm(books ~ cc, data = propimitexp_mtx)

lm_propimitexp2 <- lm(books ~ cc + language, data = propimitexp_mtx)

lm_propimitexp3 <- lm(books ~ cc * language, data = propimitexp_mtx)

anova(lm_propimitexp1, lm_propimitexp2, lm_propimitexp3)
## Analysis of Variance Table
## 
## Model 1: books ~ cc
## Model 2: books ~ cc + language
## Model 3: books ~ cc * language
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     40 1.8309                           
## 2     39 1.7641  1  0.066763 1.4407 0.2375
## 3     38 1.7610  1  0.003116 0.0672 0.7968
# summary
summary(lm_propimitexp1)
## 
## Call:
## lm(formula = books ~ cc, data = propimitexp_mtx)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32514 -0.18637 -0.01946  0.12972  0.64645 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.27359    0.08749   3.127  0.00328 **
## cc           0.43258    0.20376   2.123  0.03999 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2139 on 40 degrees of freedom
##   (48 observations deleted due to missingness)
## Multiple R-squared:  0.1013, Adjusted R-squared:  0.0788 
## F-statistic: 4.507 on 1 and 40 DF,  p-value: 0.03999
# diag
plot_model(lm_propimitexp1, type = "pred", terms = c("cc"),
           show.data = T,
           dot.size = 4)